Petal measures
#-----------------------------------------------------------------------
scatterplotMatrix(~Sepal.Length + Sepal.Width +
Petal.Length + Petal.Width | Species,
data = iris,
smooth = FALSE,
reg.line = FALSE,
ellipse = TRUE,
by.groups = TRUE,
gap = 0,
diagonal = "density")

#-----------------------------------------------------------------------
# Petal.Length & Petal.Width.
scatterplotMatrix(~Petal.Length + Petal.Width | Species,
data = iris,
smooth = FALSE,
reg.line = FALSE,
ellipse = TRUE,
by.groups = TRUE,
gap = 0,
diagonal = "density")

#-----------------------------------------------------------------------
# MLM fitting.
# Iris full model.
m1 <- lm(cbind(Petal.Length,
Petal.Width) ~ Species,
data = iris)
# Iris null model.
m0 <- update(m1, . ~ 1)
# MANOVA with Pillai test.
anova(m1)
## Analysis of Variance Table
##
## Df Pillai approx F num Df den Df Pr(>F)
## (Intercept) 1 0.98786 5939.2 2 146 < 2.2e-16 ***
## Species 2 1.04645 80.7 4 294 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract the raw residuals.
r <- residuals(m1)
# Checking the models assumptions on the residuals.
scatterplotMatrix(r,
gap = 0,
smooth = FALSE,
reg.line = FALSE,
ellipse = TRUE,
diagonal = "qqplot")

#-----------------------------------------------------------------------
# Doing the canonical analysis using eigen decomposition.
# Error SSP of the full and null models.
S_full <- crossprod(residuals(m1))
S_null <- crossprod(residuals(m0))
# Extra SSP due the hypothesis.
S_extr <- S_null - S_full
# Eigen decomposition -> canonical analysis.
ei <- eigen(solve(S_full, S_extr))
ei
## eigen() decomposition
## $values
## [1] 19.6773042 0.1047462
##
## $vectors
## [,1] [,2]
## [1,] 0.5407512 -0.3939360
## [2,] 0.8411826 0.9191379
# Cumulated proportion.
cumsum(ei$values)/sum(ei$values)
## [1] 0.994705 1.000000
# Scores (canonical variables).
scores <- as.matrix(m1$model[1]) %*% ei$vectors[, 1:2]
# Plot of the scores.
xyplot(scores[, 2] ~ scores[, 1],
groups = iris$Species,
grid = TRUE,
auto.key = list(columns = 3,
title = "Species",
cex.title = 1),
xlab = "First canonical dimension",
ylab = "Second canonical dimension",
aspect = "iso") +
layer(panel.ellipse(...))

#-----------------------------------------------------------------------
# Using the `candisc` package.
c1 <- candisc(m1, term = "Species")
str(c1)
## List of 15
## $ dfh : num 2
## $ dfe : int 147
## $ eigenvalues: num [1:2] 19.677 0.105
## $ canrsq : num [1:2] 0.9516 0.0948
## $ pct : num [1:2] 99.47 0.53
## $ rank : num 2
## $ ndim : num 2
## $ means :'data.frame': 3 obs. of 2 variables:
## ..$ Can1: num [1:3] -5.84 1.08 4.76
## ..$ Can2: num [1:3] 0.155 -0.446 0.291
## $ factors :'data.frame': 150 obs. of 1 variable:
## ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ term : chr "Species"
## $ terms : chr "Species"
## $ coeffs.raw : num [1:2, 1:2] 1.54 2.4 -2.16 5.04
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Petal.Length" "Petal.Width"
## .. ..$ : chr [1:2] "Can1" "Can2"
## $ coeffs.std : num [1:2, 1:2] 0.665 0.492 -0.93 1.032
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Petal.Length" "Petal.Width"
## .. ..$ : chr [1:2] "Can1" "Can2"
## $ structure : num [1:2, 1:2] 0.994 0.987 -0.109 0.163
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Petal.Length" "Petal.Width"
## .. ..$ : chr [1:2] "Can1" "Can2"
## $ scores :'data.frame': 150 obs. of 3 variables:
## ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..$ Can1 : num [1:150] -6.04 -6.04 -6.2 -5.89 -6.04 ...
## ..$ Can2 : num [1:150] 0.0569 0.0569 0.273 -0.1592 0.0569 ...
## - attr(*, "class")= chr "candisc"
##
## Canonical Discriminant Analysis for Species:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.951638 19.67730 19.573 99.4705 99.47
## 2 0.094815 0.10475 19.573 0.5295 100.00
##
## Class means:
##
## Can1 Can2
## setosa -5.8362 0.15489
## versicolor 1.0796 -0.44620
## virginica 4.7566 0.29132
##
## std coefficients:
## Can1 Can2
## Petal.Length 0.66460 -0.93005
## Petal.Width 0.49165 1.03197
# The canonical scores.
c("my eigen" = xyplot(scores[, 2] ~ scores[, 1],
groups = iris$Species,
grid = TRUE,
auto.key = list(columns = 3,
title = "Species",
cex.title = 1),
xlab = "First canonical dimension",
ylab = "Second canonical dimension"),
"candisc" = xyplot(Can2 ~ Can1,
groups = Species,
data = c1$scores,
grid = TRUE,
auto.key = list(columns = 3,
title = "Species",
cex.title = 1),
xlab = "First canonical dimension",
ylab = "Second canonical dimension"),
layout = c(1, NA))
## Warning in formals(fun): argument is not a function

## [,1] [,2]
## [1,] 0.5407512 -0.3939360
## [2,] 0.8411826 0.9191379
## Can1 Can2
## Petal.Length 1.544371 -2.161222
## Petal.Width 2.402394 5.042599
c1$coeffs.raw/ei$vectors[, 1:2]
## Can1 Can2
## Petal.Length 2.855973 5.486227
## Petal.Width 2.855973 5.486227
# The plot() method for `candisc` objects.
plot(c1, asp = 1)
## Vector scale factor set to 7.709

# ATTENTION: upened issue! Study the plot.candisc() function to see
# wheather they scale the vectors.
# getS3method(f = "plot", class = "candisc")
# Scores: canonical variates.
xy_can <-
xyplot(Can2 ~ Can1,
groups = Species,
data = c1$scores,
xlab = NULL,
ylab = NULL,
aspect = "iso",
auto.key = list(columns = 3),
grid = TRUE) +
layer(panel.ellipse(...)) +
layer(panel.arrows(0, 0,
c1$coeffs.raw[, 1],
c1$coeffs.raw[, 2],
length = 0.1))
xy_ori <-
xyplot(Petal.Length ~ Petal.Width,
groups = Species,
aspect = "iso",
data = iris,
grid = TRUE) +
layer(panel.ellipse(...))
c("Canonical" = xy_can,
"Original" = xy_ori,
layout = c(1, 2))
## Warning in formals(fun): argument is not a function

#--------------------------------------------
# Univariate analysis of each canical variate.
an0 <- lm(scores ~ Species, data = iris)
summary(an0)
## Response Y1 :
##
## Call:
## lm(formula = Y1 ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.87145 -0.16373 -0.01604 0.21182 0.95942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.99751 0.04952 20.14 <2e-16 ***
## Speciesversicolor 2.42150 0.07003 34.58 <2e-16 ***
## Speciesvirginica 3.70898 0.07003 52.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3501 on 147 degrees of freedom
## Multiple R-squared: 0.9516, Adjusted R-squared: 0.951
## F-statistic: 1446 on 2 and 147 DF, p-value: < 2.2e-16
##
##
## Response Y2 :
##
## Call:
## lm(formula = Y2 ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59429 -0.09219 0.01129 0.08817 0.52182
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.34983 0.02578 -13.571 < 2e-16 ***
## Speciesversicolor -0.10956 0.03645 -3.005 0.00312 **
## Speciesvirginica 0.02487 0.03645 0.682 0.49623
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1823 on 147 degrees of freedom
## Multiple R-squared: 0.09481, Adjusted R-squared: 0.0825
## F-statistic: 7.699 on 2 and 147 DF, p-value: 0.000661
Sepal measures
#-----------------------------------------------------------------------
# Sepal.Length & Sepal.Width.
scatterplotMatrix(~Sepal.Length + Sepal.Width | Species,
data = iris,
smooth = FALSE,
reg.line = FALSE,
ellipse = TRUE,
by.groups = TRUE,
gap = 0,
diagonal = "density")

# Iris full model.
m1 <- lm(cbind(Sepal.Length,
Sepal.Width) ~ Species,
data = iris)
# Iris null model.
m0 <- update(m1, . ~ 1)
# MANOVA with Pillai test.
anova(m1)
## Analysis of Variance Table
##
## Df Pillai approx F num Df den Df Pr(>F)
## (Intercept) 1 0.99311 10518.8 2 146 < 2.2e-16 ***
## Species 2 0.94531 65.9 4 294 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract the raw residuals.
r <- residuals(m1)
# Checking the models assumptions on the residuals.
scatterplotMatrix(r,
gap = 0,
smooth = FALSE,
reg.line = FALSE,
ellipse = TRUE,
diagonal = "qqplot")

#-----------------------------------------------------------------------
# Doing the canonical analysis using eigen decomposition.
# Error SSP of the full and null models.
S_full <- crossprod(residuals(m1))
S_null <- crossprod(residuals(m0))
# Extra SSP due the hypothesis.
S_extr <- S_null - S_full
# Eigen decomposition -> canonical analysis.
ei <- eigen(solve(S_full, S_extr))
ei
## eigen() decomposition
## $values
## [1] 4.1717987 0.1609957
##
## $vectors
## [,1] [,2]
## [1,] 0.6118383 0.3624970
## [2,] -0.7909829 0.9319849
# Cumulated proportion.
cumsum(ei$values)/sum(ei$values)
## [1] 0.9628425 1.0000000
# Scores (canonical variables).
scores <- as.matrix(m1$model[1]) %*% ei$vectors[, 1:2]
# Plot of the scores.
xyplot(scores[, 2] ~ scores[, 1],
groups = iris$Species,
grid = TRUE,
auto.key = list(columns = 3,
title = "Species",
cex.title = 1),
xlab = "First canonical dimension",
ylab = "Second canonical dimension",
aspect = "iso") +
layer(panel.ellipse(...))

#-----------------------------------------------------------------------
# Using the `candisc` package.
c1 <- candisc(m1, term = "Species")
str(c1)
## List of 15
## $ dfh : num 2
## $ dfe : int 147
## $ eigenvalues: num [1:2] 4.172 0.161
## $ canrsq : num [1:2] 0.807 0.139
## $ pct : num [1:2] 96.28 3.72
## $ rank : num 2
## $ ndim : num 2
## $ means :'data.frame': 3 obs. of 2 variables:
## ..$ Can1: num [1:3] -2.819 0.994 1.825
## ..$ Can2: num [1:3] 0.0943 -0.5267 0.4324
## $ factors :'data.frame': 150 obs. of 1 variable:
## ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ term : chr "Species"
## $ terms : chr "Species"
## $ coeffs.raw : num [1:2, 1:2] 2.141 -2.768 0.815 2.096
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Sepal.Length" "Sepal.Width"
## .. ..$ : chr [1:2] "Can1" "Can2"
## $ coeffs.std : num [1:2, 1:2] 1.102 -0.94 0.42 0.712
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Sepal.Length" "Sepal.Width"
## .. ..$ : chr [1:2] "Can1" "Can2"
## $ structure : num [1:2, 1:2] 0.848 -0.626 0.53 0.779
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Sepal.Length" "Sepal.Width"
## .. ..$ : chr [1:2] "Can1" "Can2"
## $ scores :'data.frame': 150 obs. of 3 variables:
## ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..$ Can1 : num [1:150] -2.82 -1.86 -2.84 -2.78 -3.31 ...
## ..$ Can2 : num [1:150] 0.322 -0.889 -0.633 -0.924 0.45 ...
## - attr(*, "class")= chr "candisc"
##
## Canonical Discriminant Analysis for Species:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.80664 4.1718 4.0108 96.2843 96.284
## 2 0.13867 0.1610 4.0108 3.7157 100.000
##
## Class means:
##
## Can1 Can2
## setosa -2.81893 0.094291
## versicolor 0.99379 -0.526724
## virginica 1.82514 0.432433
##
## std coefficients:
## Can1 Can2
## Sepal.Length 1.10226 0.41969
## Sepal.Width -0.94029 0.71201
# The canonical scores.
c("my eigen" = xyplot(scores[, 2] ~ scores[, 1],
groups = iris$Species,
grid = TRUE,
auto.key = list(columns = 3,
title = "Species",
cex.title = 1),
xlab = "First canonical dimension",
ylab = "Second canonical dimension"),
"candisc" = xyplot(Can2 ~ Can1,
groups = Species,
data = c1$scores,
grid = TRUE,
auto.key = list(columns = 3,
title = "Species",
cex.title = 1),
xlab = "First canonical dimension",
ylab = "Second canonical dimension"),
layout = c(1, NA))
## Warning in formals(fun): argument is not a function

## [,1] [,2]
## [1,] 0.6118383 0.3624970
## [2,] -0.7909829 0.9319849
## Can1 Can2
## Sepal.Length 2.141178 0.8152721
## Sepal.Width -2.768109 2.0960764
c1$coeffs.raw/ei$vectors[, 1:2]
## Can1 Can2
## Sepal.Length 3.499582 2.249045
## Sepal.Width 3.499582 2.249045
# The plot() method for `candisc` objects.
plot(c1, asp = 1)
## Vector scale factor set to 5.805

# ATTENTION: upened issue! Study the plot.candisc() function to see
# wheather they scale the vectors.
# getS3method(f = "plot", class = "candisc")
# Scores: canonical variates.
xy_can <-
xyplot(Can2 ~ Can1,
groups = Species,
data = c1$scores,
xlab = NULL,
ylab = NULL,
aspect = "iso",
auto.key = list(columns = 3),
grid = TRUE) +
layer(panel.ellipse(...)) +
layer(panel.arrows(0, 0,
c1$coeffs.raw[, 1],
c1$coeffs.raw[, 2],
length = 0.1))
xy_ori <-
xyplot(Petal.Length ~ Petal.Width,
groups = Species,
aspect = "iso",
data = iris,
grid = TRUE) +
layer(panel.ellipse(...))
c("Canonical" = xy_can,
"Original" = xy_ori,
layout = c(1, 2))
## Warning in formals(fun): argument is not a function

#--------------------------------------------
# Univariate analysis of each canical variate.
an0 <- lm(scores ~ Species, data = iris)
summary(an0)
## Response Y1 :
##
## Call:
## lm(formula = Y1 ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65786 -0.19016 -0.00481 0.15641 0.97619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.35137 0.04041 8.695 6.32e-15 ***
## Speciesversicolor 1.08948 0.05715 19.064 < 2e-16 ***
## Speciesvirginica 1.32703 0.05715 23.220 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2857 on 147 degrees of freedom
## Multiple R-squared: 0.8066, Adjusted R-squared: 0.804
## F-statistic: 306.6 on 2 and 147 DF, p-value: < 2.2e-16
##
##
## Response Y2 :
##
## Call:
## lm(formula = Y2 ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23470 -0.32349 0.03992 0.26945 1.24542
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.00950 0.06288 79.667 < 2e-16 ***
## Speciesversicolor -0.27612 0.08893 -3.105 0.00228 **
## Speciesvirginica 0.15035 0.08893 1.691 0.09301 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4446 on 147 degrees of freedom
## Multiple R-squared: 0.1387, Adjusted R-squared: 0.127
## F-statistic: 11.83 on 2 and 147 DF, p-value: 1.718e-05